home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
...taking it to the Macs!
/
...taking it to the Macs!.iso
/
Extras
/
ActiveX Mac SDK
/
ActiveX SDK
/
Container Common
/
htparse.c
< prev
next >
Wrap
Text File
|
1997-01-03
|
11KB
|
483 lines
/*
This file was derived from the libwww code, version 2.15, from CERN.
A number of modifications have been made by Spyglass.
eric@spyglass.com
*/
/* Parse HyperText Document Address HTParse.c
** ================================
*/
#include "all.h"
//jjo
#ifdef __cplusplus
extern "C" {
#endif
char *x_ExpandRelativeAnchor(const char *rel, const char *base);
#ifdef __cplusplus
}
#endif
#define HEX_ESCAPE '%'
struct struct_parts
{
char *access;
char *host;
char *absolute;
char *relative;
/* char * search; no - treated as part of path */
char *anchor;
};
/* Strip white space off a string
** ------------------------------
**
** On exit,
** Return value points to first non-white character, or to 0 if none.
** All trailing white space is OVERWRITTEN with zero.
*/
PUBLIC char *HTStrip(char *s)
{
#define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n')||(c=='\r'))
char *p = s;
if (!s)
return NULL; /* Doesn't dump core if NULL */
for (p = s; *p; p++) ; /* Find end of string */
for (p--; p >= s; p--)
{
if (SPACE(*p))
*p = 0; /* Zap trailing blanks */
else
break;
}
while (SPACE(*s))
s++; /* Strip leading blanks */
return s;
}
/* Scan a filename for its consituents
** -----------------------------------
**
** On entry,
** name points to a document name which may be incomplete.
** On exit,
** absolute or relative may be nonzero (but not both).
** host, anchor and access may be nonzero if they were specified.
** Any which are nonzero point to zero terminated strings.
*/
PRIVATE void scan(char *name, struct struct_parts *parts)
{
char *after_access;
char *p;
int length = strlen(name);
parts->access = 0;
parts->host = 0;
parts->absolute = 0;
parts->relative = 0;
parts->anchor = 0;
after_access = name;
for (p = name; *p; p++)
{
if (*p == ':')
{
*p = 0;
parts->access = name; /* Access name has been specified */
after_access = p + 1;
}
if (*p == '/')
break;
if (*p == '#')
break;
}
if (length > 0)
{
for (p = name + length - 1; p >= name; p--)
{
if (*p == '#')
{
parts->anchor = p + 1;
*p = 0; /* terminate the rest */
}
}
}
p = after_access;
if (*p == '/')
{
if (p[1] == '/')
{
parts->host = p + 2; /* host has been specified */
*p = 0; /* Terminate access */
p = strchr(parts->host, '/'); /* look for end of host name if any */
// if this is a "file" access, what appears to be a host may really be a volume
if (!strcmp(parts->access, "file"))
{
char vBuffer[256];
// make a proper mac style version of the "host" name
{
strcpy(vBuffer, parts->host);
c2pstr(vBuffer);
vBuffer[0] = 1 + p - parts->host;
vBuffer[vBuffer[0]] = ':';
}
// check to see if the "host" name matches that of a mounted volumes
if (vBuffer[0] > 1)
{
HParamBlockRec vParamBlock;
memset(&vParamBlock, 0, sizeof(vParamBlock));
vParamBlock.volumeParam.ioNamePtr = (unsigned char *)vBuffer;
vParamBlock.volumeParam.ioVolIndex = -1;
if (!PBHGetVInfoSync(&vParamBlock))
{
// it did; we will assume the "host" name is a mistake
parts->host = after_access + 1;
p = parts->host;
}
}
}
if (p)
{
*p = 0; /* Terminate host */
parts->absolute = p + 1; /* Root has been found */
}
}
else
{
parts->absolute = p + 1; /* Root found but no host */
}
}
else
{
parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
}
return;
} /*scan */
/* Parse a Name relative to another name
** -------------------------------------
**
** This returns those parts of a name which are given (and requested)
** substituting bits from the related name where necessary.
**
** On entry,
** aName A filename given
** relatedName A name relative to which aName is to be parsed
** wanted A mask for the bits which are wanted.
**
** On exit,
** returns A pointer to a malloc'd string which MUST BE FREED
*/
char *HTParse(const char *aName, const char *relatedName, int wanted)
{
char *return_value = 0;
char *p;
char *access;
struct struct_parts given, related;
char name[MAX_URL_STRING+1];
char rel[MAX_URL_STRING+1];
char result[2*MAX_URL_STRING+1]; /* Make this longer to avoid overflow */
/* Make working copies of input strings to cut up:
*/
GTR_strncpy(name, aName, MAX_URL_STRING);
GTR_strncpy(rel, relatedName, MAX_URL_STRING);
scan(name, &given);
scan(rel, &related);
/*
For the given part, if we get a URL which contains a protocol and a host,
but not an absolute, then it looked something like this:
http://www.spyglass.com:4040
We need to assume that the slash at the end should be there, or when this
is found as a hyperlink in a document, it will steal the absolute part
from the URL of the document itself (related).
*/
if (given.access && given.host && !given.absolute)
{
GTR_strncpy(name, aName, MAX_URL_STRING);
strcat(name, "/");
scan(name, &given);
}
result[0] = 0; /* Clear string */
access = given.access ? given.access : related.access;
if (wanted & PARSE_ACCESS)
if (access)
{
strcat(result, access);
if (wanted & PARSE_PUNCTUATION)
strcat(result, ":");
}
if (given.access && related.access) /* If different, inherit nothing. */
if (strcmp(given.access, related.access) != 0)
{
related.host = 0;
related.absolute = 0;
related.relative = 0;
related.anchor = 0;
}
if (wanted & PARSE_HOST)
if (given.host || related.host)
{
char *tail = result + strlen(result);
if (wanted & PARSE_PUNCTUATION)
strcat(result, "//");
if (given.host)
{
strcat(result, given.host);
}
else
{
strcat(result, related.host);
}
/* Ignore default port numbers, and trailing dots on FQDNs
which will only cause identical adreesses to look different */
{
char *p;
p = strchr(tail, ':');
if (p && access)
{ /* Port specified */
if ( ( strcmp(access, "http") == 0
&& strcmp(p, ":80") == 0)
|| ( strcmp(access, "gopher") == 0
&& strcmp(p, ":70") == 0)
#ifdef SHTTP_ACCESS_TYPE
|| ( strcmp(access, "shttp") == 0
&& strcmp(p, ":80") == 0)
#endif
)
*p = (char) 0; /* It is the default: ignore it */
}
if (!p)
p = tail + strlen(tail); /* After hostname */
if (strlen (p)) /* -dpg */
{
p--; /* End of hostname */
if (*p == '.')
*p = (char) 0; /* chop final . */
}
}
}
if (given.host && related.host) /* If different hosts, inherit no path. */
if (strcmp(given.host, related.host) != 0)
{
related.absolute = 0;
related.relative = 0;
related.anchor = 0;
}
if (wanted & PARSE_PATH)
{
if (given.absolute)
{ /* All is given */
if (wanted & PARSE_PUNCTUATION)
strcat(result, "/");
strcat(result, given.absolute);
}
else if (related.absolute)
{ /* Adopt path not name */
strcat(result, "/");
strcat(result, related.absolute);
if (given.relative)
{
p = strchr(result, '?'); /* Search part? */
if (!p)
p = result + strlen(result) - 1;
for (; *p != '/'; p--) ; /* last / */
p[1] = 0; /* Remove filename */
strcat(result, given.relative); /* Add given one */
HTSimplify(result);
}
}
else if (given.relative)
{
/* The following 3 lines were copied from NCSA Mosaic for Windows */
if ((wanted & PARSE_HOST) && (given.host || related.host) && (wanted & PARSE_PUNCTUATION))
if (result[strlen(result) - 1] != '/')
strcat(result, "/");
strcat(result, given.relative); /* what we've got */
}
else if (related.relative)
{
strcat(result, related.relative);
}
else
{ /* No inheritance */
if (!strcmp(result, "mailto:")) // mailto:
;
else if (!strcmp(result, "news:"))
;
else // protocol ends with a slash
strcat(result, "/");
}
}
if (wanted & PARSE_ANCHOR)
if (given.anchor || related.anchor)
{
if (wanted & PARSE_PUNCTUATION)
strcat(result, "#");
strcat(result, given.anchor ? given.anchor : related.anchor);
}
/* We truncate URLs to 1024 bytes if they're too long. */
result[MAX_URL_STRING] = '\0';
return_value = GTR_strdup(result);
return return_value; /* exactly the right length */
}
/*
** As strcpy() but guaranteed to work correctly
** with overlapping parameters. AL 7 Feb 1994
*/
PRIVATE void ari_strcpy(char *to, char *from)
{
char *tmp;
if (!to || !from)
return;
tmp = (char *) GTR_MALLOC(strlen(from) + 1);
if (tmp)
{
strcpy(tmp, from);
strcpy(to, tmp);
GTR_FREE(tmp);
}
else
{
/* TODO */
}
}
/* Simplify a filename
// -------------------
//
// A unix-style file is allowed to contain the seqeunce xxx/../ which may be
// replaced by "" , and the seqeunce "/./" which may be replaced by "/".
// Simplification helps us recognize duplicate filenames.
//
// Thus, /etc/junk/../fred becomes /etc/fred
// /etc/junk/./fred becomes /etc/junk/fred
//
// but we should NOT change
// http://fred.xxx.edu/../..
//
// or ../../albert.html
*/
PUBLIC void HTSimplify(char *filename)
{
char *p = filename;
char *q;
if (p)
{
while (*p && (*p == '/' || *p == '.')) /* Pass starting / or .'s */
p++;
while (*p)
{
if (*p == '/')
{
if ((p[1] == '.') && (p[2] == '.') && (p[3] == '/' || !p[3]))
{
for (q = p - 1; (q >= filename) && (*q != '/'); q--) ; /* prev slash */
if (q[0] == '/' && 0 != strncmp(q, "/../", 4)
&& !(q - 1 > filename && q[-1] == '/'))
{
ari_strcpy(q, p + 3); /* Remove /xxx/.. */
if (!*filename)
strcpy(filename, "/");
p = q - 1; /* Start again with prev slash */
}
else
{
if (q[0] == '/' && (q - 1 > filename && q[-1] == '/'))
{
/*
The so-called prev slash found is actually the one before the hostname!
The URL looks like this:
http://host.somewhere.com/../path
^ ^
| |
q p
We now need to fix the URL to remove the ../
*/
ari_strcpy(p, p + 3);
}
}
}
else if ((p[1] == '.') && (p[2] == '/' || !p[2]))
{
ari_strcpy(p, p + 2); /* Remove a slash and a dot */
}
#if 0
else if (p[-1] != ':')
{
while (p[1] == '/')
{
ari_strcpy(p, p + 1); /* Remove multiple slashes */
}
}
#endif
}
p++;
} /* end while (*p) */
} /* end if (p) */
}
/* from html.c */
char *x_ExpandRelativeAnchor(const char *rel, const char *base)
{
char *pTemp = 0;
char *stripped;
char *result = NULL;
if (!rel)
{
rel = "";
}
pTemp = GTR_strdup(rel);
if(!base)
return pTemp;
stripped = HTStrip(pTemp);
result = HTParse(stripped, base, PARSE_PUNCTUATION | PARSE_ACCESS | PARSE_HOST | PARSE_PATH | PARSE_ANCHOR);
GTR_FREE(pTemp);
return result;
}